from bs4 import BeautifulSoup
import requests
import pymysql
import datetime
import time
import util
from fake_useragent import UserAgent

ua = UserAgent()

source = "lianjia"
db = pymysql.connect(
    host="172.17.9.105",
    user="cs_platform",
    password="cs_platfm_01",
    database="icrg_test",
)
c = db.cursor()
c.execute(
    f"delete from h_p where source='{source}' and crawl_date = '{datetime.date.today()}'"
)

sql = (
    "insert into h_p(position,avenue,price,source,crawl_date) values(%s,%s,%s,%s,now())"
)

base_url = "https://wh.lianjia.com/ershoufang/pg%s"


datas = []
start = time.time()
for i in range(1, 51):
    url = base_url % i
    headers = {
        "User-Agent": ua.chrome,
    }
    response = requests.get(url, headers=headers)
    bs = BeautifulSoup(response.text, "html.parser")
    ul_list = bs.find("ul", class_="sellListContent")
    print(f"第{i}页,有无内容:{None != ul_list}")
    if None != ul_list:
        for ul in ul_list.children:
            positions = ul.find("div", class_="positionInfo").find_all("a")
            p = positions[0].text
            a = positions[1].text
            price = ul.find("div", class_="unitPrice").attrs["data-price"]
            datas.append((util.clean_text(p), util.clean_text(a), price, source))
        c.executemany(sql, datas)
        db.commit()
db.close()
end = time.time()
print(end - start)
# print(datas)
